## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
pl <- read.csv("C:/Users/matth/Documents/Grad School/Spring 2022/STAA 566/Data/pl_table_05_20.csv")
table(pl$Squad)
## 
##         Arsenal     Aston Villa Birmingham City       Blackburn       Blackpool 
##              16              13               4               7               1 
##          Bolton     Bournemouth        Brighton         Burnley    Cardiff City 
##               7               5               4               7               2 
##    Charlton Ath         Chelsea  Crystal Palace    Derby County         Everton 
##               2              16               8               1              16 
##          Fulham    Huddersfield       Hull City    Leeds United  Leicester City 
##              11               2               5               1               7 
##       Liverpool Manchester City  Manchester Utd   Middlesbrough   Newcastle Utd 
##              16              16              16               5              14 
##    Norwich City      Portsmouth             QPR         Reading   Sheffield Utd 
##               5               5               3               3               3 
##     Southampton      Stoke City      Sunderland    Swansea City       Tottenham 
##               9              10              11               7              16 
##         Watford       West Brom        West Ham  Wigan Athletic          Wolves 
##               6              11              15               8               6
#Filter to only keep teams that played in the Premiere League for all 16 seasons
pl.best <- subset(pl, Squad %in% c("Arsenal", "Everton", "Chelsea", "Manchester City", "Liverpool", "Manchester Utd", "Tottenham"))

#Remove columns that have no data
pl.best <- pl.best[,1:11]

#Highlight by squad when hovering
pl.highlight <- highlight_key(pl.best, ~Squad)

#Making GGplot object
pl.gg <- ggplot(data = pl.highlight,
                mapping = aes(x = Year,
                              y = GD,
                              text = Rk,
                              color = Squad)) + 
  geom_point(aes(),
             alpha = 1,
             shape = 20, size = 1.5) +
  geom_smooth(method = "loess", se = FALSE, lwd = .5) +
  ylab("Goal Difference (goals scored minus goals against)") +
  scale_y_continuous(breaks = seq(-20, 80, by = 20)) + 
  theme_classic(base_size = 12) +
  theme(legend.position = c(0.87, 0.25)) + 
  ggtitle("Goal Difference of Seven Premier League Teams")

#Create plotly object
pl.plotly.highlight <- ggplotly(pl.gg, tooltip = c("Squad", "GD", "Rk")) %>%
  highlight(on = "plotly_hover",
            off = "plotly_relayout")
## `geom_smooth()` using formula 'y ~ x'
#Display object
pl.plotly.highlight

About the Plot

Data Source

All of my data comes from https://fbref.com/en/comps/9/10728/stats/2020-2021-Premier-League-Stats which compiles data from the English Premiere League into downloadable data. I quality checked the information against the data on the Premiere League’s official website and it matched up.

What I want to convey

I took the seven teams that played in the Premiere League for all 16 seasons for which I pulled data. What I wanted to show is their trends over the seasons while also factoring in the End of Season Rank for the teams. I was hoping to take into account the amount that each team spent to see if there was correlation between budget and their result at the end of the season, but there was too much for me to take into account (injuries, play time of each player, weather conditions in games, etc.)

Functionality

The first thing that I added was the smooth lines to show general trends for each team. I had originally done just a simple connect-the-dots, but some of the teams had very inconsistent Goal Difference (see Manchester City from 2010 - 2015), so I decided to go with smooth lines for visual purposes.

The formatting that I chose was really just aesthetics to make sure that the plot wasn’t too painful to look at. The default smooth lines were, by default, too wide, so I narrowed them. I also chose the colors to make them distinct enough for someone who is colorblind (like me) to be able to distinguish them.

I also added the highlight feature which highlighted the team over each year. This feature was needed because, at first glance, the graph looks really messy, so the highlight really makes the graph legible.